Example of a Dirichlet Process Mixture Model clustering using Gaussians
In [1]:
usingDataFrames# number of samples per clusterN=100# create clustersData=DataFrame(x=randn(N),y=randn(N),class="cluster1")# append second clusterappend!(Data,DataFrame(x=randn(N)+5,y=randn(N)+5,class="cluster2"));
usingBNPD=2# 2 dimensional dataN=200# number of data points# data matrixX=zeros(D,N)X[1,:]=convert(Array,Data[:x])X[2,:]=convert(Array,Data[:y])# init base distribution parametersmu0=vec(mean(X,2))kappa0=9.0nu0=5.0Sigma0=eye(D)*10# base distribution and concentration parameter (Gaussian with Normal Inverse Wishart Prior)H=GaussianWishart(mu0,kappa0,nu0,Sigma0)# train Dirichlet Process Mixture Modelresult=train(DPM(H),Gibbs(),RandomInitialisation(k=10),X);
visualize inferred models
In [5]:
usingInteractK=zeros(Int,N)# interactively loop over all iterations@manipulateforiteration=1:size(result,1)idx=unique(result[iteration].Z)fornin1:NK[n]=findfirst(idx.==result[iteration].Z[n])endplot(x=X[1,:],y=X[2,:],color=K)end
# number of clustersC=[length(unique(x.Z))forxinresult]# alpha parameterA=map(x->x.α,result)# plotp1=plot(x=collect(1:size(result,1)),y=C,Geom.line,Guide.xlabel("iteration"),Guide.ylabel("number of clusters",orientation=:vertical))p2=plot(x=collect(1:size(result,1)),y=A,Geom.line,Guide.xlabel("iteration"),Guide.ylabel("alpha",orientation=:vertical))# stack togethervstack(p1,p2)